python 爬取steam/csgo市场数据

您所在的位置:网站首页 steam 商场 python 爬取steam/csgo市场数据

python 爬取steam/csgo市场数据

2024-07-09 19:22| 来源: 网络整理| 查看: 265

在不登录的情况下频繁访问steam市场很容易被禁止访问,由于steam社区市场被墙了,所以需要挂vpn才能正常爬数据

首先挂好VPN,登录网页版steam,得到cookies和user-agent的内容,然后复制到headers中的cookies和user-agent位置上 设置好相关的路径后就可以了 得到的数据会保存到csv文件中

csgo的每一种箱子都会有一个对应的id,得到相应的ID然后保存到列表中就可以了,只写了得到箱子在售量和起价数据的程序,其他的东西原理上应该都差不多

初学python,有什么错误请多多指教

import sys sys.path.append(r'E:\anaconda\Lib\site-packages') from pyquery import PyQuery as pq import requests from urllib.parse import urlencode from lxml import etree import time import random import csv base_url='https://steamcommunity.com/market/itemordershistogram?' headers={ 'Host': 'steamcommunity.com', 'Referer': 'https://steamcommunity.com/market', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'Cookie':' ', 'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36', } retry_id_list = [] def get_page(case_id): params = {'country': 'CN', 'language': 'schinese', 'currency': '23', #货币 'item_nameid': case_id, 'two_factor': 0 } url = base_url + urlencode(params) try: response = requests.Session().get(url,headers=headers) if response.status_code==200: print('successully get page') return response.json() except requests.ConnectionError as e: retry_id_list.append(case_id) print('error: ',e.args) return 0 def parse_page(json): if json: sell_items = json.get('sell_order_summary') html = etree.HTML(sell_items) num_and_price = html.xpath('//span/text()') price = num_and_price[1].split(' ')[1] num = num_and_price[0] # print(price) # print(num) return num,price case_id_list = ['1275323','1269049','1546282','1913364','15490345',##CSGO 电竞2013 英勇大 CSGO2 电竞2013冬 '3438414','6820494','7177182','8987853',#冬季攻势 CSGO3 凤凰 猎杀者 '14962905','15490346','23853214','29205213',#突围 电竞2014夏 先锋 幻彩 '40091990','49359031','67060949','84444464',#幻彩2 弯曲 暗影 左轮 '139654771','149865785','156110183','165027636',#野火 幻彩3 伽玛 伽玛2 '175854202','175880240','175896275','175917239',#手套 光谱 九头蛇 光谱2 '175966708','175999886','176024744','176042493'] #命悬一线,地平线,头号,棱彩 num_lst = [] price_lst = [] _time_ = time.strftime("%Y.%m.%d",time.localtime()) #获取当前时间 num_lst.append(_time_) price_lst.append(' ') index = 0 retry_index_list = [] if __name__ =='__main__': for case_id in case_id_list: index = index + 1 json = get_page(case_id) if json==0: num_lst.append(' ') price_lst.append(' ') retry_index_list.append(index) else: data = parse_page(json) if int(data[0])>1000000: #调整箱子在售数量单位和精确度 num_w = round(int(data[0])*0.0001,1) elif int(data[0])>10000: num_w = round(int(data[0])*0.0001,2) elif int(data[0])>1000: num_w = round(int(data[0])*0.0001,3) else: num_w = int(data[0])*0.0001 num_lst.append(str(num_w)) price_lst.append(data[1]) time.sleep(random.randint(2,6)) k_ = 0 for retry_id in retry_id_list: print('retrying') json = get_page(retry_id) data = parse_page(json) if int(data[0])>1000000: #调整箱子在售数量单位和精确度 num_w = round(int(data[0])*0.0001,1) elif int(data[0])>10000: num_w = round(int(data[0])*0.0001,2) elif int(data[0])>1000: num_w = round(int(data[0])*0.0001,3) else: num_w = int(data[0])*0.0001 num_lst[retry_index_list[k_]] = str(num_w) price_lst[retry_index_list[k_]] = data[1] k_=k_+1 time.sleep(random.randint(2,6)) with open('case_data.csv','a+') as csvfile: #写入csv文件 writer=csv.writer(csvfile) writer.writerow(num_lst) writer.writerow(price_lst)


【本文地址】


今日新闻


推荐新闻


CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3